fix grapheme test to work on unmodified data file
authorSteven G. Johnson <stevenj@alum.mit.edu>
Sun, 29 Mar 2020 12:53:11 +0000 (08:53 -0400)
committerSteven G. Johnson <stevenj@alum.mit.edu>
Sun, 29 Mar 2020 12:53:11 +0000 (08:53 -0400)
CMakeLists.txt
data/Makefile
test/graphemetest.c

index 12bfda1645af632161e9d6fa87b44346ef1ab1de..4c5649e2eb999e2148adb13ea3f726b638b9da20 100644 (file)
@@ -66,8 +66,7 @@ if(UTF8PROC_ENABLE_TESTING)
   file(MAKE_DIRECTORY data)
   set(UNICODE_VERSION 13.0.0)
   file(DOWNLOAD http://www.unicode.org/Public/${UNICODE_VERSION}/ucd/NormalizationTest.txt data/NormalizationTest.txt SHOW_PROGRESS)
-  file(DOWNLOAD http://www.unicode.org/Public/${UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt data/GraphemeBreakTestOrg.txt SHOW_PROGRESS)
-  execute_process(COMMAND bash -c "cat data/GraphemeBreakTestOrg.txt | /usr/bin/perl -pe 's,÷,/,g;s,×,+,g' && rm -f data/GraphemeBreakTestOrg.txt" OUTPUT_FILE data/GraphemeBreakTest.txt)
+  file(DOWNLOAD http://www.unicode.org/Public/${UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt data/GraphemeBreakTest.txt SHOW_PROGRESS)
   add_executable(case test/tests.h test/tests.c utf8proc.h test/case.c)
   target_link_libraries(case utf8proc)
   add_executable(custom test/tests.h test/tests.c utf8proc.h test/custom.c)
index 8c6470f8fe00d157d7fe32977d94f2a96bcb71d2..aeef269390e3964a4a72fe8d19ca2911b86e6818 100644 (file)
@@ -46,7 +46,7 @@ NormalizationTest.txt:
        $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/NormalizationTest.txt
 
 GraphemeBreakTest.txt:
-       $(CURL) $(CURLFLAGS) $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakTest.txt | $(PERL) -pe 's,÷,/,g;s,×,+,g' > $@
+       $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakTest.txt
 
 emoji-data.txt:
        $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://unicode.org/Public/$(UNICODE_VERSION)/ucd/emoji/emoji-data.txt
index 337a0306ada0c48d0b9335c2cafb05bd78028f40..93a7d03d92c9fbb6e1b9ba2971b273d5b15d0ba9 100644 (file)
@@ -18,12 +18,12 @@ int main(int argc, char **argv)
 
         while (buf[bi]) {
             bi = skipspaces(buf, bi);
-            if (buf[bi] == '/') { /* grapheme break */
+            if ((uint8_t)buf[bi] == 0xc3 && (uint8_t)buf[bi+1] == 0xb7) { /* U+00f7 = grapheme break */
                 src[si++] = '/';
-                bi++;
+                bi += 2;
             }
-            else if (buf[bi] == '+') { /* no break */
-                bi++;
+            else if ((uint8_t)buf[bi] == 0xc3 && (uint8_t)buf[bi+1] == 0x97) { /* U+00d7 = no break */
+                bi += 2;
             }
             else if (buf[bi] == '#') { /* start of comments */
                 break;